PE-sieve
Scans all running processes. Recognizes and dumps a variety of potentially malicious implants (replaced/implanted PEs, shellcodes, hooks, in-memory patches).
Toggle main menu visibility
Loading...
Searching...
No Matches
stats
stats_analyzer.cpp
Go to the documentation of this file.
1
#include "
stats_analyzer.h
"
2
3
#include "
std_dev_calc.h
"
4
5
#define ENTROPY_DATA_TRESHOLD 3.0
6
#define ENTROPY_CODE_TRESHOLD ENTROPY_DATA_TRESHOLD
7
#define ENTROPY_ENC_TRESHOLD 6.0
8
#define ENTROPY_STRONG_ENC_TRESHOLD 7.0
9
10
#define CHARSET_SIZE 0xFF
11
12
namespace
pesieve
{
13
14
using namespace
pesieve::stats
;
15
16
double
getValRatio
(IN
const
AreaMultiStats
&
stats
, BYTE val)
17
{
18
auto
val_itr =
stats
.currArea.histogram.find(val);
19
double
ratio = 0;
20
if
(val_itr !=
stats
.currArea.histogram.end()) {
21
ratio = ((double)val_itr->second / (double)
stats
.currArea.size);
22
//std::cout << "Val : " << std::hex << (UINT) val << " RATIO: " << ratio << "\n";
23
}
24
return
ratio;
25
}
26
27
double
pesieve::stats::getPrintableRatio
(IN
const
AreaMultiStats
&
stats
)
28
{
29
if
(!
stats
.currArea.size)
return
0;
30
size_t
total_size = 0;
31
for
(
auto
itr =
stats
.currArea.histogram.begin(); itr !=
stats
.currArea.histogram.end(); ++itr) {
32
BYTE val = itr->first;
33
size_t
size = itr->second;
34
35
if
(
IS_PRINTABLE
(val)) {
36
total_size += size;
37
}
38
}
39
return
(
double
)total_size / (double)
stats
.currArea.size;
40
}
41
42
size_t
checkRatios
(IN
const
AreaMultiStats
&
stats
, IN std::map<BYTE, double>& ratios)
43
{
44
size_t
points = 0;
45
46
for
(
auto
itr = ratios.begin(); itr != ratios.end(); ++itr) {
47
BYTE val = itr->first;
48
double
currRatio =
getValRatio
(
stats
, val);
49
if
(currRatio >= itr->second) {
50
#ifdef DISPLAY_STATS
51
std::cout <<
"[+] OK "
<< std::hex << (UINT)val << std::dec <<
" : "
<< currRatio <<
"\n"
;
52
#endif
53
points++;
54
}
55
}
56
return
points;
57
}
58
59
size_t
countFoundStrings
(IN
const
AreaMultiStats
&
stats
, IN
const
std::set<std::string> &neededStrings, IN
size_t
minOccurrence)
60
{
61
size_t
totalCount = 0;
62
if
(!
stats
.currArea.foundStrings.size()) {
63
return
0;
64
}
65
for
(
auto
itr = neededStrings.begin(); itr != neededStrings.end(); ++itr)
66
{
67
const
std::string& codeStr = *itr;
68
auto
found =
stats
.currArea.foundStrings.find(codeStr);
69
if
(found ==
stats
.currArea.foundStrings.end()) {
70
continue
;
71
}
72
size_t
currCount = found->second;
73
if
(currCount >= minOccurrence) {
74
totalCount++;
75
}
76
}
77
return
totalCount;
78
}
79
80
size_t
pesieve::stats::fetchPeakValues
(IN
const
ChunkStats
& currArea, IN
double
stdDev,
int
devCount, OUT std::set<BYTE>& peaks)
81
{
82
if
(!currArea.size)
return
0;
83
84
size_t
peaksCount = 0;
85
size_t
peakVal = currArea.frequencies.rbegin()->first;
86
size_t
i = 0;
87
for
(
auto
itr1 = currArea.frequencies.rbegin(); itr1 != currArea.frequencies.rend(); ++itr1, ++i) {
88
size_t
counter = itr1->first;
89
double
diff = (double)peakVal - (
double
)counter;
90
if
(diff > (devCount * stdDev))
break
;
91
92
std::set<BYTE> vals = itr1->second;
93
peaksCount += vals.size();
94
peaks.insert(vals.begin(), vals.end());
95
}
96
return
peaksCount;
97
}
98
99
size_t
pesieve::stats::valuesNotBelowMean
(IN
const
ChunkStats
& currArea,
double
mean)
100
{
101
size_t
valsCount = 0;
102
for
(
auto
itr1 = currArea.frequencies.rbegin(); itr1 != currArea.frequencies.rend(); ++itr1) {
103
double
counter = (double)itr1->first;
104
if
(counter >= mean) {
105
valsCount += itr1->second.size();
106
}
107
else
{
108
break
;
109
}
110
}
111
return
valsCount;
112
}
113
};
114
115
116
//--
117
118
size_t
pesieve::stats::fillCodeStrings
(OUT std::set<std::string>& codeStrings)
119
{
120
const
size_t
patterns_count = 8;
121
char
patterns[][patterns_count] = {
122
"WVS"
,
123
"SVW"
,
124
"D$"
,
125
"AQ"
,
126
"AX"
,
127
"UWV"
,
128
"[^_]"
,
129
"ZX[]"
130
};
131
for
(
size_t
i = 0; i != patterns_count; ++i) {
132
codeStrings.insert(patterns[i]);
133
}
134
return
codeStrings.size();
135
}
136
137
//---
138
namespace
pesieve
{
139
140
class
CodeMatcher
:
public
RuleMatcher
141
{
142
public
:
143
CodeMatcher
()
144
:
RuleMatcher
(
CODE_RULE
)
145
{
146
}
147
148
virtual
bool
_isMatching
(IN
const
AreaMultiStats
&
stats
)
149
{
150
const
size_t
kMinCodePoints = 2;
151
const
size_t
kMinStrPoints = 2;
152
153
double
entropy =
stats
.currArea.entropy;
154
if
(entropy <
ENTROPY_CODE_TRESHOLD
)
return
false
;
155
156
#ifdef DISPLAY_STATS
157
std::cout <<
"FOUND strings: "
<<
stats
.currArea.foundStrings.size() <<
"\n"
;
158
159
for
(
auto
itr =
stats
.currArea.foundStrings.begin(); itr !=
stats
.currArea.foundStrings.end(); ++itr)
160
{
161
const
std::string& codeStr = itr->first;
162
size_t
count = itr->second;
163
std::cout <<
"---->>> FOUND Str "
<< codeStr <<
" count: "
<< count <<
"\n"
;
164
}
165
#endif
166
std::set<std::string> codeStrings;
167
fillCodeStrings
(codeStrings);
168
169
size_t
strPoints =
countFoundStrings
(
stats
, codeStrings, 1);
170
#ifdef DISPLAY_STATS
171
std::cout <<
"---->>> STR points: "
<< strPoints <<
"\n"
;
172
#endif
173
if
(codeStrings.size() && !strPoints) {
174
return
false
;
175
}
176
// possible code
177
size_t
ratiosPoints = 0;
178
std::map<BYTE, double> ratios;
179
ratios[0x00] = 0.1;
180
ratios[0x0F] = 0.01;
181
ratios[0x48] = 0.02;
182
ratios[0x8B] = 0.02;
183
ratios[0xCC] = 0.01;
184
ratios[0xE8] = 0.01;
185
ratios[0xFF] = 0.02;
186
187
ratiosPoints +=
checkRatios
(
stats
, ratios);
188
#ifdef DISPLAY_STATS
189
std::cout <<
"---->>> CODE points: "
<< ratiosPoints <<
"\n"
;
190
#endif
191
if
(ratiosPoints < kMinCodePoints) {
192
return
false
;
193
}
194
if
(ratiosPoints >= (ratios.size() / 2 + 1)) {
195
return
true
;
196
}
197
if
(strPoints < kMinStrPoints) {
198
return
false
;
199
}
200
return
true
;
201
}
202
};
203
204
205
class
ObfuscatedMatcher
:
public
RuleMatcher
206
{
207
public
:
208
ObfuscatedMatcher
()
209
:
RuleMatcher
(
"possible_obfuscated"
) {}
210
211
virtual
bool
_isMatching
(IN
const
AreaMultiStats
&
stats
)
212
{
213
const
double
kMinNBRatio = 0.17;
214
BYTE mFreqVal =
getMostFrequentValue
(
stats
.currArea.frequencies);
215
double
entropy =
stats
.currArea.entropy;
216
const
size_t
populationSize =
stats
.currArea.histogram.size();
217
218
if
(populationSize < (
CHARSET_SIZE
/ 3)) {
219
return
false
;
220
}
221
bool
entropyT = (mFreqVal != 0 && entropy >
ENTROPY_DATA_TRESHOLD
);
// possible XOR obfuscation, or block cipher
222
if
(!entropyT) {
223
return
false
;
224
}
225
226
StdDeviationCalc
dev(
stats
.currArea.histogram, populationSize);
227
const
double
mean = dev.
getMean
();
228
const
size_t
nB =
valuesNotBelowMean
(
stats
.currArea, mean);
229
const
double
nBRatio = (double)nB / (
double
)populationSize;
230
if
(nBRatio > 0.5) {
231
return
true
;
// possible strong encryption
232
}
233
234
// filter out texts:
235
const
double
printRatio =
getPrintableRatio
(
stats
);
236
if
(printRatio > 0.8) {
237
return
false
;
238
}
239
if
(entropy < ENTROPY_ENC_TRESHOLD && printRatio > 0.6) {
240
return
false
;
241
}
242
double
stDev = dev.
calcSampleStandardDeviation
();
243
/*
244
const size_t topVal = stats.currArea.frequencies.rbegin()->first;
245
const size_t bottomVal = stats.currArea.frequencies.begin()->first;
246
double diff = topVal - bottomVal;
247
248
double valSpread = diff / stDev;
249
*/
250
std::set<BYTE>peaks;
251
size_t
peaksCount =
fetchPeakValues
(
stats
.currArea, stDev, 2, peaks);
252
double
peaksRatio = (double)peaksCount / (
double
)populationSize;
253
if
(peaksRatio > 0.4) {
// possible strong encryption
254
return
true
;
255
}
256
if
(peaks.find(0) == peaks.end()) {
257
// 0 is not among the peaks:
258
return
true
;
259
}
260
if
(nBRatio < kMinNBRatio) {
261
return
false
;
262
}
263
#ifdef DISPLAY_STATS
264
std::cout <<
"All peaks: \n"
;
265
for
(
auto
itr = peaks.begin(); itr != peaks.end(); itr++) {
266
std::cout << std::hex << (UINT)*itr <<
" "
;
267
}
268
std::cout <<
"\n"
;
269
#endif
270
return
false
;
271
}
272
};
273
274
275
class
EncryptedMatcher
:
public
RuleMatcher
276
{
277
public
:
278
EncryptedMatcher
()
279
:
RuleMatcher
(
"possible_encrypted"
) {}
280
281
virtual
bool
_isMatching
(IN
const
AreaMultiStats
&
stats
)
282
{
283
double
entropy =
stats
.currArea.entropy;
284
const
BYTE mFreqVal =
getMostFrequentValue
(
stats
.currArea.frequencies);
285
bool
fullAreaEncrypted = (entropy >
ENTROPY_STRONG_ENC_TRESHOLD
);
// strong encryption
286
if
(mFreqVal != 0 && entropy >
ENTROPY_ENC_TRESHOLD
) {
287
if
(
stats
.currArea.frequencies.size() > 1) {
288
auto
fItr =
stats
.currArea.frequencies.begin();
// first one
289
auto
eItr =
stats
.currArea.frequencies.rbegin();
// last one
290
// most common - least common ratio
291
double
diff = ((double)(eItr->first - fItr->first)) / (
double
)
stats
.currArea.size;
292
//std::cout << "RATIO : " << fItr->first << " VS " << eItr->first << " DIFF: " << diff << "\n";
293
if
(diff < 0.01) {
294
fullAreaEncrypted =
true
;
295
}
296
}
297
}
298
return
fullAreaEncrypted;
299
}
300
};
301
302
class
TextMatcher
:
public
RuleMatcher
303
{
304
public
:
305
TextMatcher
()
306
:
RuleMatcher
(
"possible_text"
) {}
307
308
virtual
bool
_isMatching
(IN
const
AreaMultiStats
&
stats
)
309
{
310
bool
possibleText =
false
;
311
const
double
printRatio =
getPrintableRatio
(
stats
);
312
if
(printRatio > 0.8) {
313
possibleText =
true
;
314
}
315
return
possibleText;
316
}
317
};
318
319
//---
320
321
void
RuleMatchersSet::initRules
(DWORD ruleTypes)
322
{
323
if
(ruleTypes &
RuleMatcher::RULE_CODE
) {
324
matchers
.push_back(
new
CodeMatcher
());
325
}
326
if
(ruleTypes &
RuleMatcher::RULE_TEXT
) {
327
this->
matchers
.push_back(
new
TextMatcher
());
328
}
329
if
(ruleTypes &
RuleMatcher::RULE_ENCRYPTED
) {
330
matchers
.push_back(
new
EncryptedMatcher
());
331
}
332
if
(ruleTypes &
RuleMatcher::RULE_OBFUSCATED
) {
333
matchers
.push_back(
new
ObfuscatedMatcher
());
334
}
335
}
336
337
size_t
RuleMatchersSet::findMatches
(IN
const
AreaMultiStats
&
stats
, OUT
AreaInfo
&
info
)
338
{
339
if
(!
stats
.isFilled()) {
340
std::cout <<
"Stat not filled!\n"
;
341
return
false
;
342
}
343
344
size_t
matched = 0;
345
for
(
auto
itr =
matchers
.begin(); itr !=
matchers
.end(); ++itr) {
346
RuleMatcher
* m = *itr;
347
if
(!m)
continue
;
348
if
(m->
isMatching
(
stats
)) {
349
info
.matchedRules.push_back(m->
name
);
350
matched++;
351
}
352
}
353
return
matched;
354
}
355
356
};
//namespace pesieve
pesieve::AreaMultiStats
Definition
multi_stats.h:190
pesieve::CodeMatcher
Definition
stats_analyzer.cpp:141
pesieve::CodeMatcher::CodeMatcher
CodeMatcher()
Definition
stats_analyzer.cpp:143
pesieve::CodeMatcher::_isMatching
virtual bool _isMatching(IN const AreaMultiStats &stats)
Definition
stats_analyzer.cpp:148
pesieve::EncryptedMatcher
Definition
stats_analyzer.cpp:276
pesieve::EncryptedMatcher::_isMatching
virtual bool _isMatching(IN const AreaMultiStats &stats)
Definition
stats_analyzer.cpp:281
pesieve::EncryptedMatcher::EncryptedMatcher
EncryptedMatcher()
Definition
stats_analyzer.cpp:278
pesieve::ObfuscatedMatcher
Definition
stats_analyzer.cpp:206
pesieve::ObfuscatedMatcher::ObfuscatedMatcher
ObfuscatedMatcher()
Definition
stats_analyzer.cpp:208
pesieve::ObfuscatedMatcher::_isMatching
virtual bool _isMatching(IN const AreaMultiStats &stats)
Definition
stats_analyzer.cpp:211
pesieve::RuleMatcher
Definition
stats_analyzer.h:30
pesieve::RuleMatcher::isMatching
bool isMatching(IN const AreaMultiStats &stats)
Definition
stats_analyzer.h:47
pesieve::RuleMatcher::RULE_CODE
@ RULE_CODE
Definition
stats_analyzer.h:36
pesieve::RuleMatcher::RULE_TEXT
@ RULE_TEXT
Definition
stats_analyzer.h:37
pesieve::RuleMatcher::RULE_ENCRYPTED
@ RULE_ENCRYPTED
Definition
stats_analyzer.h:39
pesieve::RuleMatcher::RULE_OBFUSCATED
@ RULE_OBFUSCATED
Definition
stats_analyzer.h:38
pesieve::RuleMatcher::RuleMatcher
RuleMatcher(std::string _name)
Definition
stats_analyzer.h:42
pesieve::RuleMatcher::name
std::string name
Definition
stats_analyzer.h:58
pesieve::TextMatcher
Definition
stats_analyzer.cpp:303
pesieve::TextMatcher::TextMatcher
TextMatcher()
Definition
stats_analyzer.cpp:305
pesieve::TextMatcher::_isMatching
virtual bool _isMatching(IN const AreaMultiStats &stats)
Definition
stats_analyzer.cpp:308
pesieve::stats::StdDeviationCalc
Definition
std_dev_calc.h:11
pesieve::stats::StdDeviationCalc::calcSampleStandardDeviation
double calcSampleStandardDeviation()
Definition
std_dev_calc.h:35
pesieve::stats::StdDeviationCalc::getMean
double getMean()
Definition
std_dev_calc.h:22
pesieve::stats
Definition
entropy.h:7
pesieve::stats::valuesNotBelowMean
size_t valuesNotBelowMean(IN const ChunkStats &currArea, double mean)
Definition
stats_analyzer.cpp:99
pesieve::stats::getPrintableRatio
double getPrintableRatio(IN const AreaMultiStats &stats)
Definition
stats_analyzer.cpp:27
pesieve::stats::fillCodeStrings
size_t fillCodeStrings(OUT std::set< std::string > &codeStrings)
Definition
stats_analyzer.cpp:118
pesieve::stats::fetchPeakValues
size_t fetchPeakValues(IN const ChunkStats &currArea, IN double stdDev, int devCount, OUT std::set< BYTE > &peaks)
Definition
stats_analyzer.cpp:80
pesieve::stats::getMostFrequentValue
BYTE getMostFrequentValue(IN const std::map< size_t, std::set< T > > &frequencies)
Definition
stats_util.h:35
pesieve
Definition
pesieve.py:1
pesieve::getValRatio
double getValRatio(IN const AreaMultiStats &stats, BYTE val)
Definition
stats_analyzer.cpp:16
pesieve::checkRatios
size_t checkRatios(IN const AreaMultiStats &stats, IN std::map< BYTE, double > &ratios)
Definition
stats_analyzer.cpp:42
pesieve::info
std::string info()
The string with the basic information about the scanner.
Definition
pe_sieve.cpp:276
pesieve::countFoundStrings
size_t countFoundStrings(IN const AreaMultiStats &stats, IN const std::set< std::string > &neededStrings, IN size_t minOccurrence)
Definition
stats_analyzer.cpp:59
ENTROPY_DATA_TRESHOLD
#define ENTROPY_DATA_TRESHOLD
Definition
stats_analyzer.cpp:5
CHARSET_SIZE
#define CHARSET_SIZE
Definition
stats_analyzer.cpp:10
ENTROPY_CODE_TRESHOLD
#define ENTROPY_CODE_TRESHOLD
Definition
stats_analyzer.cpp:6
ENTROPY_STRONG_ENC_TRESHOLD
#define ENTROPY_STRONG_ENC_TRESHOLD
Definition
stats_analyzer.cpp:8
ENTROPY_ENC_TRESHOLD
#define ENTROPY_ENC_TRESHOLD
Definition
stats_analyzer.cpp:7
stats_analyzer.h
CODE_RULE
#define CODE_RULE
Definition
stats_analyzer.h:11
std_dev_calc.h
IS_PRINTABLE
#define IS_PRINTABLE(c)
Definition
strings_util.h:8
pesieve::AreaInfo
Definition
stats_analyzer.h:70
pesieve::ChunkStats
Statistics from a block of data.
Definition
multi_stats.h:54
pesieve::RuleMatchersSet::initRules
void initRules(DWORD ruleTypes)
Definition
stats_analyzer.cpp:321
pesieve::RuleMatchersSet::findMatches
size_t findMatches(IN const AreaMultiStats &stats, OUT AreaInfo &info)
Definition
stats_analyzer.cpp:337
pesieve::RuleMatchersSet::matchers
std::vector< RuleMatcher * > matchers
Definition
stats_analyzer.h:150
Generated by
1.17.0