PE-sieve
Scans all running processes. Recognizes and dumps a variety of potentially malicious implants (replaced/implanted PEs, shellcodes, hooks, in-memory patches).
Loading...
Searching...
No Matches
stats_analyzer.cpp
Go to the documentation of this file.
1#include "stats_analyzer.h"
2
3#include "std_dev_calc.h"
4
5#define ENTROPY_DATA_TRESHOLD 3.0
6#define ENTROPY_CODE_TRESHOLD ENTROPY_DATA_TRESHOLD
7#define ENTROPY_ENC_TRESHOLD 6.0
8#define ENTROPY_STRONG_ENC_TRESHOLD 7.0
9
10#define CHARSET_SIZE 0xFF
11
12namespace pesieve {
13
14 using namespace pesieve::stats;
15
16 double getValRatio(IN const AreaMultiStats& stats, BYTE val)
17 {
18 auto val_itr = stats.currArea.histogram.find(val);
19 double ratio = 0;
20 if (val_itr != stats.currArea.histogram.end()) {
21 ratio = ((double)val_itr->second / (double)stats.currArea.size);
22 //std::cout << "Val : " << std::hex << (UINT) val << " RATIO: " << ratio << "\n";
23 }
24 return ratio;
25 }
26
28 {
29 if (!stats.currArea.size) return 0;
30 size_t total_size = 0;
31 for (auto itr = stats.currArea.histogram.begin(); itr != stats.currArea.histogram.end(); ++itr) {
32 BYTE val = itr->first;
33 size_t size = itr->second;
34
35 if (IS_PRINTABLE(val)) {
36 total_size += size;
37 }
38 }
39 return (double)total_size / (double)stats.currArea.size;
40 }
41
42 size_t checkRatios(IN const AreaMultiStats& stats, IN std::map<BYTE, double>& ratios)
43 {
44 size_t points = 0;
45
46 for (auto itr = ratios.begin(); itr != ratios.end(); ++itr) {
47 BYTE val = itr->first;
48 double currRatio = getValRatio(stats, val);
49 if (currRatio >= itr->second) {
50#ifdef DISPLAY_STATS
51 std::cout << "[+] OK " << std::hex << (UINT)val << std::dec << " : " << currRatio << "\n";
52#endif
53 points++;
54 }
55 }
56 return points;
57 }
58
59 size_t countFoundStrings(IN const AreaMultiStats& stats, IN const std::set<std::string> &neededStrings, IN size_t minOccurrence)
60 {
61 size_t totalCount = 0;
62 if (!stats.currArea.foundStrings.size()) {
63 return 0;
64 }
65 for (auto itr = neededStrings.begin(); itr != neededStrings.end(); ++itr)
66 {
67 const std::string& codeStr = *itr;
68 auto found = stats.currArea.foundStrings.find(codeStr);
69 if (found == stats.currArea.foundStrings.end()) {
70 continue;
71 }
72 size_t currCount = found->second;
73 if (currCount >= minOccurrence) {
74 totalCount++;
75 }
76 }
77 return totalCount;
78 }
79
80 size_t pesieve::stats::fetchPeakValues(IN const ChunkStats& currArea, IN double stdDev, int devCount, OUT std::set<BYTE>& peaks)
81 {
82 if (!currArea.size) return 0;
83
84 size_t peaksCount = 0;
85 size_t peakVal = currArea.frequencies.rbegin()->first;
86 size_t i = 0;
87 for (auto itr1 = currArea.frequencies.rbegin(); itr1 != currArea.frequencies.rend(); ++itr1, ++i) {
88 size_t counter = itr1->first;
89 double diff = (double)peakVal - (double)counter;
90 if (diff > (devCount * stdDev)) break;
91
92 std::set<BYTE> vals = itr1->second;
93 peaksCount += vals.size();
94 peaks.insert(vals.begin(), vals.end());
95 }
96 return peaksCount;
97 }
98
99 size_t pesieve::stats::valuesNotBelowMean(IN const ChunkStats& currArea, double mean)
100 {
101 size_t valsCount = 0;
102 for (auto itr1 = currArea.frequencies.rbegin(); itr1 != currArea.frequencies.rend(); ++itr1) {
103 double counter = (double)itr1->first;
104 if (counter >= mean) {
105 valsCount += itr1->second.size();
106 }
107 else {
108 break;
109 }
110 }
111 return valsCount;
112 }
113};
114
115
116//--
117
118size_t pesieve::stats::fillCodeStrings(OUT std::set<std::string>& codeStrings)
119{
120 const size_t patterns_count = 8;
121 char patterns[][patterns_count] = {
122 "WVS",
123 "SVW",
124 "D$",
125 "AQ",
126 "AX",
127 "UWV",
128 "[^_]",
129 "ZX[]"
130 };
131 for (size_t i = 0; i != patterns_count; ++i) {
132 codeStrings.insert(patterns[i]);
133 }
134 return codeStrings.size();
135}
136
137//---
138namespace pesieve {
139
141 {
142 public:
145 {
146 }
147
148 virtual bool _isMatching(IN const AreaMultiStats& stats)
149 {
150 const size_t kMinCodePoints = 2;
151 const size_t kMinStrPoints = 2;
152
153 double entropy = stats.currArea.entropy;
154 if (entropy < ENTROPY_CODE_TRESHOLD) return false;
155
156#ifdef DISPLAY_STATS
157 std::cout << "FOUND strings: " << stats.currArea.foundStrings.size() << "\n";
158
159 for (auto itr = stats.currArea.foundStrings.begin(); itr != stats.currArea.foundStrings.end(); ++itr)
160 {
161 const std::string& codeStr = itr->first;
162 size_t count = itr->second;
163 std::cout << "---->>> FOUND Str " << codeStr << " count: " << count << "\n";
164 }
165#endif
166 std::set<std::string> codeStrings;
167 fillCodeStrings(codeStrings);
168
169 size_t strPoints = countFoundStrings(stats, codeStrings, 1);
170#ifdef DISPLAY_STATS
171 std::cout << "---->>> STR points: " << strPoints << "\n";
172#endif
173 if (codeStrings.size() && !strPoints) {
174 return false;
175 }
176 // possible code
177 size_t ratiosPoints = 0;
178 std::map<BYTE, double> ratios;
179 ratios[0x00] = 0.1;
180 ratios[0x0F] = 0.01;
181 ratios[0x48] = 0.02;
182 ratios[0x8B] = 0.02;
183 ratios[0xCC] = 0.01;
184 ratios[0xE8] = 0.01;
185 ratios[0xFF] = 0.02;
186
187 ratiosPoints += checkRatios(stats, ratios);
188#ifdef DISPLAY_STATS
189 std::cout << "---->>> CODE points: " << ratiosPoints << "\n";
190#endif
191 if (ratiosPoints < kMinCodePoints) {
192 return false;
193 }
194 if (ratiosPoints >= (ratios.size() / 2 + 1)) {
195 return true;
196 }
197 if (strPoints < kMinStrPoints) {
198 return false;
199 }
200 return true;
201 }
202 };
203
204
206 {
207 public:
209 : RuleMatcher("possible_obfuscated") {}
210
211 virtual bool _isMatching(IN const AreaMultiStats& stats)
212 {
213 const double kMinNBRatio = 0.17;
214 BYTE mFreqVal = getMostFrequentValue(stats.currArea.frequencies);
215 double entropy = stats.currArea.entropy;
216 const size_t populationSize = stats.currArea.histogram.size();
217
218 if (populationSize < (CHARSET_SIZE / 3)) {
219 return false;
220 }
221 bool entropyT = (mFreqVal != 0 && entropy > ENTROPY_DATA_TRESHOLD); // possible XOR obfuscation, or block cipher
222 if (!entropyT) {
223 return false;
224 }
225
226 StdDeviationCalc dev(stats.currArea.histogram, populationSize);
227 const double mean = dev.getMean();
228 const size_t nB = valuesNotBelowMean(stats.currArea, mean);
229 const double nBRatio = (double)nB / (double)populationSize;
230 if (nBRatio > 0.5) {
231 return true; // possible strong encryption
232 }
233
234 // filter out texts:
235 const double printRatio = getPrintableRatio(stats);
236 if (printRatio > 0.8) {
237 return false;
238 }
239 if (entropy < ENTROPY_ENC_TRESHOLD && printRatio > 0.6) {
240 return false;
241 }
242 double stDev = dev.calcSampleStandardDeviation();
243 /*
244 const size_t topVal = stats.currArea.frequencies.rbegin()->first;
245 const size_t bottomVal = stats.currArea.frequencies.begin()->first;
246 double diff = topVal - bottomVal;
247
248 double valSpread = diff / stDev;
249 */
250 std::set<BYTE>peaks;
251 size_t peaksCount = fetchPeakValues(stats.currArea, stDev, 2, peaks);
252 double peaksRatio = (double)peaksCount / (double)populationSize;
253 if (peaksRatio > 0.4) { // possible strong encryption
254 return true;
255 }
256 if (peaks.find(0) == peaks.end()) {
257 // 0 is not among the peaks:
258 return true;
259 }
260 if (nBRatio < kMinNBRatio) {
261 return false;
262 }
263#ifdef DISPLAY_STATS
264 std::cout << "All peaks: \n";
265 for (auto itr = peaks.begin(); itr != peaks.end(); itr++) {
266 std::cout << std::hex << (UINT)*itr << " ";
267 }
268 std::cout << "\n";
269#endif
270 return false;
271 }
272 };
273
274
276 {
277 public:
279 : RuleMatcher("possible_encrypted") {}
280
281 virtual bool _isMatching(IN const AreaMultiStats& stats)
282 {
283 double entropy = stats.currArea.entropy;
284 const BYTE mFreqVal = getMostFrequentValue(stats.currArea.frequencies);
285 bool fullAreaEncrypted = (entropy > ENTROPY_STRONG_ENC_TRESHOLD);// strong encryption
286 if (mFreqVal != 0 && entropy > ENTROPY_ENC_TRESHOLD) {
287 if (stats.currArea.frequencies.size() > 1) {
288 auto fItr = stats.currArea.frequencies.begin(); // first one
289 auto eItr = stats.currArea.frequencies.rbegin(); // last one
290 // most common - least common ratio
291 double diff = ((double)(eItr->first - fItr->first)) / (double)stats.currArea.size;
292 //std::cout << "RATIO : " << fItr->first << " VS " << eItr->first << " DIFF: " << diff << "\n";
293 if (diff < 0.01) {
294 fullAreaEncrypted = true;
295 }
296 }
297 }
298 return fullAreaEncrypted;
299 }
300 };
301
303 {
304 public:
306 : RuleMatcher("possible_text") {}
307
308 virtual bool _isMatching(IN const AreaMultiStats& stats)
309 {
310 bool possibleText = false;
311 const double printRatio = getPrintableRatio(stats);
312 if (printRatio > 0.8) {
313 possibleText = true;
314 }
315 return possibleText;
316 }
317 };
318
319 //---
320
321 void RuleMatchersSet::initRules(DWORD ruleTypes)
322 {
323 if (ruleTypes & RuleMatcher::RULE_CODE) {
324 matchers.push_back(new CodeMatcher());
325 }
326 if (ruleTypes & RuleMatcher::RULE_TEXT) {
327 this->matchers.push_back(new TextMatcher());
328 }
329 if (ruleTypes & RuleMatcher::RULE_ENCRYPTED) {
330 matchers.push_back(new EncryptedMatcher());
331 }
332 if (ruleTypes & RuleMatcher::RULE_OBFUSCATED) {
333 matchers.push_back(new ObfuscatedMatcher());
334 }
335 }
336
338 {
339 if (!stats.isFilled()) {
340 std::cout << "Stat not filled!\n";
341 return false;
342 }
343
344 size_t matched = 0;
345 for (auto itr = matchers.begin(); itr != matchers.end(); ++itr) {
346 RuleMatcher* m = *itr;
347 if (!m) continue;
348 if (m->isMatching(stats)) {
349 info.matchedRules.push_back(m->name);
350 matched++;
351 }
352 }
353 return matched;
354 }
355
356}; //namespace pesieve
virtual bool _isMatching(IN const AreaMultiStats &stats)
virtual bool _isMatching(IN const AreaMultiStats &stats)
virtual bool _isMatching(IN const AreaMultiStats &stats)
bool isMatching(IN const AreaMultiStats &stats)
virtual bool _isMatching(IN const AreaMultiStats &stats)
size_t valuesNotBelowMean(IN const ChunkStats &currArea, double mean)
double getPrintableRatio(IN const AreaMultiStats &stats)
size_t fillCodeStrings(OUT std::set< std::string > &codeStrings)
size_t fetchPeakValues(IN const ChunkStats &currArea, IN double stdDev, int devCount, OUT std::set< BYTE > &peaks)
BYTE getMostFrequentValue(IN const std::map< size_t, std::set< T > > &frequencies)
Definition stats_util.h:35
double getValRatio(IN const AreaMultiStats &stats, BYTE val)
size_t checkRatios(IN const AreaMultiStats &stats, IN std::map< BYTE, double > &ratios)
std::string info()
The string with the basic information about the scanner.
Definition pe_sieve.cpp:274
size_t countFoundStrings(IN const AreaMultiStats &stats, IN const std::set< std::string > &neededStrings, IN size_t minOccurrence)
#define ENTROPY_DATA_TRESHOLD
#define CHARSET_SIZE
#define ENTROPY_CODE_TRESHOLD
#define ENTROPY_STRONG_ENC_TRESHOLD
#define ENTROPY_ENC_TRESHOLD
#define CODE_RULE
#define IS_PRINTABLE(c)
Definition strings_util.h:8
Statistics from a block of data.
Definition multi_stats.h:54
void initRules(DWORD ruleTypes)
size_t findMatches(IN const AreaMultiStats &stats, OUT AreaInfo &info)
std::vector< RuleMatcher * > matchers