PE-sieve
Scans all running processes. Recognizes and dumps a variety of potentially malicious implants (replaced/implanted PEs, shellcodes, hooks, in-memory patches).
Loading...
Searching...
No Matches
multi_stats.h
Go to the documentation of this file.
1#pragma once
2
3#include <windows.h>
4#include <iostream>
5#include <string>
6#include <set>
7
8#include "entropy.h"
9#include "stats.h"
10#include "stats_util.h"
12#include "../utils/path_util.h"
13
14namespace pesieve {
15
18 {
19 public:
24
25 // Copy constructor
30
31 bool isFilled()
32 {
33 return (watchedStrings.size() != 0) ? true : false;
34 }
35
37 std::string hasWatchedSubstring(std::string& lastStr)
38 {
39 for (auto itr = watchedStrings.begin(); itr != watchedStrings.end(); ++itr) {
40 const std::string s = *itr;
41 if (lastStr.find(s) != std::string::npos && s.length()) {
42 //std::cout << "[+] KEY for string: " << lastStr << " found: " << s << "\n";
43 return s; // the current string contains searched string
44 }
45 }
46 //std::cout << "[-] KEY for string: " << lastStr << " NOT found!\n";
47 return "";
48 }
49
50 std::set<std::string> watchedStrings;
51 };
52
54 struct ChunkStats {
55 //
57 : size(0), offset(0), entropy(0), longestStr(0), prevVal(0),
59 {
60 }
61
62 ChunkStats(size_t _offset, size_t _size)
63 : size(_size), offset(_offset), entropy(0), longestStr(0), prevVal(0),
65 {
66 }
67
68 // Copy constructor
70 : size(p1.size), offset(p1.offset),
73 {
74#ifdef _KEEP_STR
75 allStrings = p1.allStrings;
76#endif //_KEEP_STR
79 settings = p1.settings;
82 }
83
85 {
86 settings = _settings;
87 }
88
89 void appendVal(BYTE val)
90 {
91
92 size++;
93 histogram[val]++;
94 prevVal = val;
95
96 // scan strings:
97 const bool isPrint = IS_PRINTABLE(val);
98 if (isPrint) {
99 lastStr += char(val);
100 }
101 else {
102 const bool isClean = (val == 0) ? true : false; //terminated cleanly?
103 finishLastStr(isClean);
104 lastStr.clear();
105 }
106 }
107
108 void finishLastStr(bool isClean)
109 {
110 if (lastStr.length() < 2) {
111 return;
112 }
113 stringsCount++;
114 if (isClean) cleanStringsCount++;
115
116 if (settings) {
117 std::string key = settings->hasWatchedSubstring(lastStr);
118 if (key.length()) {
119 foundStrings[key]++; // the current string contains searched string
120 }
121 }
122#ifdef _KEEP_STR
123 allStrings.push_back(lastStr);
124#endif //_KEEP_STR
125 //std::cout << "-----> lastStr:" << lastStr << "\n";
126 if (lastStr.length() > longestStr) {
127 longestStr = lastStr.length();
128 }
129 lastStr.clear();
130 }
131
132 const virtual void fieldsToJSON(std::stringstream& outs, size_t level)
133 {
134 OUT_PADDED(outs, level, "\"offset\" : ");
135 outs << std::hex << "\"" << offset << "\"";
136 outs << ",\n";
137 OUT_PADDED(outs, level, "\"size\" : ");
138 outs << std::hex << "\"" << size << "\"";
139 outs << ",\n";
140 OUT_PADDED(outs, level, "\"charset_size\" : ");
141 outs << std::dec << histogram.size();
142
143 std::set<BYTE> values;
144 size_t freq = stats::getMostFrequentValues(frequencies, values, 16, 8);
145 if (freq && values.size()) {
146 outs << ",\n";
147 OUT_PADDED(outs, level, "\"most_freq_vals\" : ");
148 outs << std::hex << "\"";
150 outs << "\"";
151 }
152 outs << ",\n";
153 OUT_PADDED(outs, level, "\"entropy\" : ");
154 outs << std::dec << entropy;
155 }
156
158 {
160 finishLastStr(true);
161
162 for (auto itr = histogram.begin(); itr != histogram.end(); ++itr) {
163 const size_t count = itr->second;
164 const BYTE val = itr->first;
165 frequencies[count].insert(val);
166 }
167 }
168
169 double entropy;
170 size_t size;
171 size_t offset;
172
174 size_t longestStr; // the longest ASCII string in the chunk
175
176 std::string lastStr;
179 std::map<BYTE, size_t> histogram;
180 std::map<size_t, std::set<BYTE>> frequencies;
181
183
184 std::map<std::string, size_t> foundStrings;
185#ifdef _KEEP_STR
186 std::vector< std::string > allStrings;
187#endif
188 };
189
190 class AreaMultiStats : public AreaStats {
191 public:
193 {
194 }
195
196 // Copy constructor
198 : currArea(p1.currArea)
199 {
200 }
201
203 {
204 MultiStatsSettings* multiSettings = dynamic_cast<MultiStatsSettings*>(settings);
205 if (!multiSettings) return false;
206
207 currArea.fillSettings(multiSettings);
208 return true;
209 }
210
211 const virtual void fieldsToJSON(std::stringstream& outs, size_t level)
212 {
213 OUT_PADDED(outs, level, "\"full_area\" : {\n");
214 currArea.fieldsToJSON(outs, level + 1);
215 outs << "\n";
216 OUT_PADDED(outs, level, "}");
217 }
218
219 bool isFilled() const
220 {
221 return (currArea.size != 0) ? true : false;
222 }
223
225 {
227 }
228
229 ChunkStats currArea; // stats from the whole area
230
231 protected:
232 void _appendVal(BYTE val)
233 {
234 currArea.appendVal(val);
235 }
236
237 };
238};
void _appendVal(BYTE val)
bool fillSettings(StatsSettings *settings)
virtual const void fieldsToJSON(std::stringstream &outs, size_t level)
AreaMultiStats(const AreaMultiStats &p1)
Base class for the statistics from analyzed buffer.
Definition stats.h:20
#define OUT_PADDED(stream, field_size, str)
Definition format_util.h:12
size_t getMostFrequentValues(IN const std::map< size_t, std::set< T > > &frequencies, OUT std::set< T > &values, IN OPTIONAL size_t top=0, IN OPTIONAL size_t maxDiff=0)
Definition stats_util.h:48
double calcShannonEntropy(std::map< T, size_t > &histogram, size_t totalSize)
Definition entropy.h:22
std::string hexdumpValues(std::set< T > &values)
Definition stats_util.h:23
std::string escape_path_separators(std::string path)
Definition path_util.cpp:27
#define IS_PRINTABLE(c)
Definition strings_util.h:8
Statistics from a block of data.
Definition multi_stats.h:54
std::map< size_t, std::set< BYTE > > frequencies
MultiStatsSettings * settings
void finishLastStr(bool isClean)
void appendVal(BYTE val)
Definition multi_stats.h:89
ChunkStats(size_t _offset, size_t _size)
Definition multi_stats.h:62
std::string lastStr
void fillSettings(MultiStatsSettings *_settings)
Definition multi_stats.h:84
ChunkStats(const ChunkStats &p1)
Definition multi_stats.h:69
std::map< BYTE, size_t > histogram
virtual const void fieldsToJSON(std::stringstream &outs, size_t level)
std::map< std::string, size_t > foundStrings
Settings defining what type of stats should be collected.
Definition multi_stats.h:18
std::string hasWatchedSubstring(std::string &lastStr)
Searches a given substring among the watchedStrings. If the substring found, return the corresponding...
Definition multi_stats.h:37
MultiStatsSettings(const MultiStatsSettings &p1)
Definition multi_stats.h:26
std::set< std::string > watchedStrings
Definition multi_stats.h:50
Base class for settings defining what type of stats should be collected.
Definition stats.h:13